#-------------------------------------------------------#
#   Quality Assessment of the Academic Freedom Index    #
#-------------------------------------------------------#

# Title: "Quality Assessment of the Academic Freedom Index: Strengths, Weaknesses, and How Best to Use It" 
# Authors: "Lott, Lars", "Spannagel, Janika"
# date: 2024-09-19
# journal: Perspectives on Politics
# DOI: TBA
# written under "R and RStudio (4.4.1)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(lubridate)
library(dotwhisker)
library(estimatr)
library(texreg)
library(readxl)
library(haven)
#library(vutils)
library(rstan)
library(truncnorm)
library(abind)
library(matrixStats)

#---------------------------------------------------#
#--------------------load data ---------------------#
#---------------------------------------------------#

#load coder-level data 
coder_level_ds <- read.csv(file.path("data/vdem_v13",  "vdem_13_coder_level", "coder_level_ds_v13.csv"))
gc()

#load coder characteristics
coder_characteristics <- readRDS(file.path("data/coder_characteristics_wide.rds"))

# load country_aggregated data
vdem_full_v13 <- readRDS(file.path("data/vdem_v13", "vdem_13_cy", "V-Dem-CY-Full+Others-v13.rds"))

#---------------------------------------------------#
#-------------Predicting Respondent perceptions ----#
#---------------------------------------------------#

### Load posteriors

v2cafexch <- readRDS("data/posteriors/v13/v2cafexch_posterior.rds") # freedom of academic exchange and dissemination
v2cafres <- readRDS("data/posteriors/v13/v2cafres_posterior.rds") # freedom to research and teach
v2cainsaut <- readRDS("data/posteriors/v13/v2cainsaut_posterior.rds") # institutional autonomy
v2casurv <- readRDS("data/posteriors/v13/v2casurv_posterior.rds") #campus integrity
v2clacfree <- readRDS("data/posteriors/v13/v2clacfree_posterior.rds") # freedom of academic and cultural expression

#### First step: Calculate perception estimates for raters ####

vnames <- c("v2cafexch", "v2cafres", "v2cainsaut",
            "v2casurv", "v2clacfree")

v2cafexch$posterior

#### Create a function to create rater perceptions for each variable ####

## Perceptions have the conditional distribution N(Z_i, \sigma_r^2), truncated to
## \gamma_{j,{y_{ir}-1, y_{ir}}}.
## \sigma_r^2 = (1/beta_r)^2 
## \gamma_{r,k} = \tau_{r,k} / \sigma_r
## i is the country-year, r is the rater
## truncates min(beta) at 0.07 to get rid of really annoying edge cases

gen.perceptions <- function (name, thin=4, minbeta=0.07) {
  post <- rstan::extract(get(name)$posterior)
  
  coder.table <- get(name)$post.sample$gamma %>%
    rownames %>% 
    grep("1[.]", ., value = TRUE) %>%
    gsub("1[.]", "", .) %>%
    data.frame(
      coder_id = as.numeric(.), 
      rater_id = seq_along(.), 
      row.names = NULL,
      stringsAsFactors = FALSE)

  y <- get(name)$model_input$y
  r <- get(name)$model_input$j_id
  i <- get(name)$model_input$sy_id
  post$beta[post$beta < minbeta] <- minbeta
  sigma <- 1/post$beta
  tau <- array(apply(post$gamma, 3, function (x) x * sigma), dim=dim(post$gamma))
  tau <- abind(matrix(-Inf, nrow=nrow(sigma), ncol=ncol(sigma)),
               tau,
               matrix(Inf, nrow=nrow(sigma), ncol=ncol(sigma)), along=3)
  cid <-  sapply(r, function (x) coder.table$coder_id[coder.table$rater_id == x])
  pcept <- sapply(seq(1, nrow(post$Z), thin), function (sim) {
    tau.obs <- sapply(1:length(y), function (obs) tau[sim, r[obs], y[obs]])
    taup1.obs <- sapply(1:length(y), function(obs) tau[sim, r[obs], y[obs]+1])
    rtruncnorm(length(y), tau.obs, taup1.obs,
               post$Z[sim,i], (sigma[sim, r])^2)
  })
  rownames(pcept) <- paste(cid, i, sep = "_")
  pcept
}

################################################################################
#### Calculate Coder perceptions of country_dates and extract coder Perceptions per variable ####
################################################################################

#write function for perception mean 

gen.perception.mean <- function(name) {
  i <- sapply(paste(get(name), "perceptions", sep="."), function (var)
    apply(get(var), 1, mean))
  return(i)
}


#### v2cafexch ####

v2cafexch.perceptions <- gen.perceptions("v2cafexch")

v2cafexch.perceptionsmean <- sapply(paste("v2cafexch", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cafexch.perceptionssd <- sapply(paste("v2cafexch", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cafexch.perceptionsmean <- as.data.frame(v2cafexch.perceptionsmean)
v2cafexch.perceptionsmean <- tibble::rownames_to_column(v2cafexch.perceptionsmean, "id_var")

v2cafexch.perceptionssd <- as.data.frame(v2cafexch.perceptionssd)
v2cafexch.perceptionssd <- tibble::rownames_to_column(v2cafexch.perceptionssd, "id_var")

v2cafexch.perceptionsmean %<>% rename(v2cafexch_perceptmean = v2cafexch.perceptions)
v2cafexch.perceptionssd %<>% rename(v2cafexch_perceptsd = v2cafexch.perceptions)

v2cafexch.perceptions <- v2cafexch.perceptionsmean %>%
  left_join(v2cafexch.perceptionssd, by = "id_var")
  
v2cafexch.perceptions$country_date_id <- as.numeric(map(str_split(v2cafexch.perceptions$id_var, "_"), 2))
v2cafexch.perceptions$coder_id <- as.numeric(map(str_split(v2cafexch.perceptions$id_var, "_"), 1))
v2cafexch.perceptions <- v2cafexch.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2cafres #####

v2cafres.perceptions <- gen.perceptions("v2cafres")

v2cafres.perceptionsmean <- sapply(paste("v2cafres", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cafres.perceptionssd <- sapply(paste("v2cafres", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cafres.perceptionsmean <- as.data.frame(v2cafres.perceptionsmean)
v2cafres.perceptionsmean <- tibble::rownames_to_column(v2cafres.perceptionsmean, "id_var")

v2cafres.perceptionssd <- as.data.frame(v2cafres.perceptionssd)
v2cafres.perceptionssd <- tibble::rownames_to_column(v2cafres.perceptionssd, "id_var")

v2cafres.perceptionsmean %<>% rename(v2cafres_perceptmean = v2cafres.perceptions)
v2cafres.perceptionssd %<>% rename(v2cafres_perceptsd = v2cafres.perceptions)

v2cafres.perceptions <- v2cafres.perceptionsmean %>%
  left_join(v2cafres.perceptionssd, by = "id_var")

v2cafres.perceptions$country_date_id <- as.numeric(map(str_split(v2cafres.perceptions$id_var, "_"), 2))
v2cafres.perceptions$coder_id <- as.numeric(map(str_split(v2cafres.perceptions$id_var, "_"), 1))
v2cafres.perceptions <- v2cafres.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2cainsaut#####

v2cainsaut.perceptions <- gen.perceptions("v2cainsaut")

v2cainsaut.perceptionsmean <- sapply(paste("v2cainsaut", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2cainsaut.perceptionssd <- sapply(paste("v2cainsaut", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2cainsaut.perceptionsmean <- as.data.frame(v2cainsaut.perceptionsmean)
v2cainsaut.perceptionsmean <- tibble::rownames_to_column(v2cainsaut.perceptionsmean, "id_var")

v2cainsaut.perceptionssd <- as.data.frame(v2cainsaut.perceptionssd)
v2cainsaut.perceptionssd <- tibble::rownames_to_column(v2cainsaut.perceptionssd, "id_var")

v2cainsaut.perceptionsmean %<>% rename(v2cainsaut_perceptmean = v2cainsaut.perceptions)
v2cainsaut.perceptionssd %<>% rename(v2cainsaut_perceptsd = v2cainsaut.perceptions)

v2cainsaut.perceptions <- v2cainsaut.perceptionsmean %>%
  left_join(v2cainsaut.perceptionssd, by = "id_var")

v2cainsaut.perceptions$country_date_id <- as.numeric(map(str_split(v2cainsaut.perceptions$id_var, "_"), 2))
v2cainsaut.perceptions$coder_id <- as.numeric(map(str_split(v2cainsaut.perceptions$id_var, "_"), 1))
v2cainsaut.perceptions <- v2cainsaut.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())

#### v2casurv #####

v2casurv.perceptions <- gen.perceptions("v2casurv")

v2casurv.perceptionsmean <- sapply(paste("v2casurv", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2casurv.perceptionssd <- sapply(paste("v2casurv", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2casurv.perceptionsmean <- as.data.frame(v2casurv.perceptionsmean)
v2casurv.perceptionsmean <- tibble::rownames_to_column(v2casurv.perceptionsmean, "id_var")

v2casurv.perceptionssd <- as.data.frame(v2casurv.perceptionssd)
v2casurv.perceptionssd <- tibble::rownames_to_column(v2casurv.perceptionssd, "id_var")

v2casurv.perceptionsmean %<>% rename(v2casurv_perceptmean = v2casurv.perceptions)
v2casurv.perceptionssd %<>% rename(v2casurv_perceptsd = v2casurv.perceptions)

v2casurv.perceptions <- v2casurv.perceptionsmean %>%
  left_join(v2casurv.perceptionssd, by = "id_var")

v2casurv.perceptions$country_date_id <- as.numeric(map(str_split(v2casurv.perceptions$id_var, "_"), 2))
v2casurv.perceptions$coder_id <- as.numeric(map(str_split(v2casurv.perceptions$id_var, "_"), 1))
v2casurv.perceptions <- v2casurv.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())


#### v2clacfree #####

v2clacfree.perceptions <- gen.perceptions("v2clacfree")

v2clacfree.perceptionsmean <- sapply(paste("v2clacfree", "perceptions", sep="."), function (var)
  apply(get(var), 1, mean))

v2clacfree.perceptionssd <- sapply(paste("v2clacfree", "perceptions", sep="."), function (var)
  apply(get(var), 1, sd))

v2clacfree.perceptionsmean <- as.data.frame(v2clacfree.perceptionsmean)
v2clacfree.perceptionsmean <- tibble::rownames_to_column(v2clacfree.perceptionsmean, "id_var")

v2clacfree.perceptionssd <- as.data.frame(v2clacfree.perceptionssd)
v2clacfree.perceptionssd <- tibble::rownames_to_column(v2clacfree.perceptionssd, "id_var")

v2clacfree.perceptionsmean %<>% rename(v2clacfree_perceptmean = v2clacfree.perceptions)
v2clacfree.perceptionssd %<>% rename(v2clacfree_perceptsd = v2clacfree.perceptions)

v2clacfree.perceptions <- v2clacfree.perceptionsmean %>%
  left_join(v2clacfree.perceptionssd, by = "id_var")

v2clacfree.perceptions$country_date_id <- as.numeric(map(str_split(v2clacfree.perceptions$id_var, "_"), 2))
v2clacfree.perceptions$coder_id <- as.numeric(map(str_split(v2clacfree.perceptions$id_var, "_"), 1))
v2clacfree.perceptions <- v2clacfree.perceptions %>%
  dplyr::select(id_var, country_date_id, coder_id, everything())


################################################################################
#### Merge var_perceptions data with coder_level_ds                         ####
################################################################################

v2cafexch.country_date_translation <- v2cafexch$country_date_translation
v2cafres.country_date_translation <- v2cafres$country_date_translation
v2cainsaut.country_date_translation <- v2cainsaut$country_date_translation
v2casurv.country_date_translation <- v2casurv$country_date_translation
v2clacfree.country_date_translation <- v2clacfree$country_date_translation

rm(list = ls()[grep("perceptionsmean", ls())])
rm(list = ls()[grep("perceptionssd", ls())])

v2cafexch.perceptions %<>%
  left_join(v2cafexch.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)

v2cafres.perceptions %<>%
  left_join(v2cafres.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2cainsaut.perceptions %<>%
  left_join(v2cainsaut.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2casurv.perceptions %<>%
  left_join(v2casurv.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2ca")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)


v2clacfree.perceptions %<>%
  left_join(v2clacfree.country_date_translation, by = "country_date_id") %>%
  dplyr::select(coder_id, country_date, starts_with("v2cl")) %>%
  mutate(country_text_id = as.character(map(str_split(country_date, " "), 1)), 
         historical_date = as.character(map(str_split(country_date, " "), 2))) %>%
  dplyr::select(-country_date)

coder_level_ds <- coder_level_ds %>%
  left_join(v2cafexch.perceptions, by = c("coder_id", "country_text_id", "historical_date")) %>%
  left_join(v2cafres.perceptions, by = c("coder_id", "country_text_id", "historical_date")) %>%
  left_join(v2cainsaut.perceptions, by = c("coder_id", "country_text_id", "historical_date")) %>%
  left_join(v2casurv.perceptions, by = c("coder_id", "country_text_id", "historical_date")) %>%
  left_join(v2clacfree.perceptions, by = c("coder_id", "country_text_id", "historical_date")) %>%
  dplyr::select(country_text_id, country_id, historical_date, coder_id, starts_with("v2cafexch"), starts_with("v2cafres"), 
                starts_with("v2cainsaut"), starts_with("v2casurv"), starts_with("v2clacfree"), starts_with("v2zz")) 

coder_level_ds %<>%
  group_by(coder_id, country_text_id)%>%
  fill(v2cafexch_perceptmean, .direction = "down") %>%
  fill(v2cafexch_perceptsd, .direction = "down") %>%
  fill(v2cafres_perceptmean, .direction = "down") %>%
  fill(v2cafres_perceptsd, .direction = "down") %>%
  fill(v2cainsaut_perceptmean, .direction = "down") %>%
  fill(v2cainsaut_perceptsd, .direction = "down") %>%
  fill(v2casurv_perceptmean, .direction = "down") %>%
  fill(v2casurv_perceptsd, .direction = "down") %>%
  fill(v2clacfree_perceptmean, .direction = "down") %>%
  fill(v2clacfree_perceptsd, .direction = "down") %>%
  mutate(v2cafexch_perceptmean = ifelse(is.na(v2cafexch), NA, v2cafexch_perceptmean), 
         v2cafexch_perceptsd = ifelse(is.na(v2cafexch), NA, v2cafexch_perceptsd), 
         v2cafres_perceptmean = ifelse(is.na(v2cafres), NA, v2cafres_perceptmean), 
         v2cafres_perceptsd = ifelse(is.na(v2cafres), NA, v2cafres_perceptsd), 
         v2cainsaut_perceptmean = ifelse(is.na(v2cainsaut), NA, v2cainsaut_perceptmean), 
         v2cainsaut_perceptsd = ifelse(is.na(v2cainsaut), NA, v2cainsaut_perceptsd), 
         v2casurv_perceptmean = ifelse(is.na(v2casurv), NA, v2casurv_perceptmean), 
         v2casurv_perceptsd = ifelse(is.na(v2casurv), NA, v2casurv_perceptsd), 
         v2clacfree_perceptmean = ifelse(is.na(v2clacfree), NA, v2clacfree_perceptmean), 
         v2clacfree_perceptsd = ifelse(is.na(v2clacfree), NA, v2clacfree_perceptsd))


################################################################################
################################################################################

#### Generate Beta (expert reliability scores) ####

## v2cafexch ##

v2cafexch.beta <- v2cafexch$post.sample$beta

v2cafexch.beta.mean <- rowMeans(v2cafexch.beta)
v2cafexch.beta.sd <- rowSds(v2cafexch.beta)
v2cafexch.coder_id <- rownames(v2cafexch.beta)


v2cafexch.beta <- data.frame(
  coder_id = as.integer(v2cafexch.coder_id), 
  v2cafexch_betamean = v2cafexch.beta.mean, 
  v2cafexch_betasd = v2cafexch.beta.sd
)

## v2cafres ##

v2cafres.beta <- v2cafres$post.sample$beta

v2cafres.beta.mean <- rowMeans(v2cafres.beta)
v2cafres.beta.sd <- rowSds(v2cafres.beta)
v2cafres.coder_id <- rownames(v2cafres.beta)


v2cafres.beta <- data.frame(
  coder_id = as.integer(v2cafres.coder_id), 
  v2cafres_betamean = v2cafres.beta.mean, 
  v2cafres_betasd = v2cafres.beta.sd
)


## v2cainsaut ##

v2cainsaut.beta <- v2cainsaut$post.sample$beta

v2cainsaut.beta.mean <- rowMeans(v2cainsaut.beta)
v2cainsaut.beta.sd <- rowSds(v2cainsaut.beta)
v2cainsaut.coder_id <- rownames(v2cainsaut.beta)


v2cainsaut.beta <- data.frame(
  coder_id = as.integer(v2cainsaut.coder_id), 
  v2cainsaut_betamean = v2cainsaut.beta.mean, 
  v2cainsaut_betasd = v2cainsaut.beta.sd
)

## v2casurv ##

v2casurv.beta <- v2casurv$post.sample$beta

v2casurv.beta.mean <- rowMeans(v2casurv.beta)
v2casurv.beta.sd <- rowSds(v2casurv.beta)
v2casurv.coder_id <- rownames(v2casurv.beta)


v2casurv.beta <- data.frame(
  coder_id = as.integer(v2casurv.coder_id), 
  v2casurv_betamean = v2casurv.beta.mean, 
  v2casurv_betasd = v2casurv.beta.sd
)


## v2clacfree ##


v2clacfree.beta <-v2clacfree$post.sample$beta

v2clacfree.beta.mean <- rowMeans(v2clacfree.beta)
v2clacfree.beta.sd <- rowSds(v2clacfree.beta)
v2clacfree.coder_id <- rownames(v2clacfree.beta)


v2clacfree.beta <- data.frame(
  coder_id = as.integer(v2clacfree.coder_id), 
  v2clacfree_betamean = v2clacfree.beta.mean, 
  v2clacfree_betasd = v2clacfree.beta.sd
)

#### Merge Beta Mean and Beta SD to coder-level ds ####

coder_level_ds <- coder_level_ds %>%
  left_join(v2cafexch.beta, by = c("coder_id")) %>%
  left_join(v2cafres.beta, by = c("coder_id")) %>%
  left_join(v2cainsaut.beta, by = c("coder_id")) %>%
  left_join(v2casurv.beta, by = c("coder_id")) %>%
  left_join(v2clacfree.beta, by = c("coder_id")) %>%
  mutate(v2clacfree_betamean = ifelse(is.na(v2clacfree), NA, v2clacfree_betamean),
         v2clacfree_betasd = ifelse(is.na(v2clacfree), NA, v2clacfree_betasd),
         v2casurv_betamean = ifelse(is.na(v2casurv), NA, v2casurv_betamean),
         v2casurv_betasd = ifelse(is.na(v2casurv), NA, v2casurv_betasd),
         v2cainsaut_betamean = ifelse(is.na(v2cainsaut), NA, v2cainsaut_betamean),
         v2cainsaut_betamean = ifelse(is.na(v2cainsaut), NA, v2cainsaut_betasd),
         v2cafres_betamean = ifelse(is.na(v2cafres), NA, v2cafres_betamean),
         v2cafres_betasd = ifelse(is.na(v2cafres), NA, v2cafres_betasd),
         v2cafexch_betamean = ifelse(is.na(v2cafexch), NA, v2cafexch_betamean),
         v2cafexch_betasd = ifelse(is.na(v2cafexch), NA, v2cafexch_betasd))
  
saveRDS(coder_level_ds, "data/coder_level_ds_perceptions.rds")

rm(list = ls()[grep(".beta", ls())])


